
library(tidyverse)
library(rio)
library(cregg)
library(patchwork)

# set replication folder as working directory
setwd("~replication")

load("data_genderedcost_long.rdata")

'%!in%' <- function(x,y)!('%in%'(x,y))

# only include completed answers - and answers given before deadline
# 2021-12-20 21:49:52 was the last response within the time frame
df_long <- df_long %>% 
  filter(SurveyStatus==2)

df_long <- df_long %>% 
  filter(SurveyEndTime<="2021-12-20 21:49:52")

### FIGURE 7
## With and without dominant options

df_men <- df_long %>% 
  dplyr::filter(sex=="Man")

mm_men <- cregg::mm(df_men, # data
                    choice~position + remuneration + workload + work_environment, # formula
                    id = id)

mm_men$sex <- "Men - All choices (main)"

df_women <- df_long %>% 
  dplyr::filter(sex=="Woman")

mm_women <- cregg::mm(df_women, # data
                      choice~position + remuneration + workload + work_environment, # formula
                      id = id)

mm_women$sex <- "Women - All choices (main)"

### without dominant
df_men_nodom <- df_long %>% 
  dplyr::filter(dominant_exposure==0) %>%
  dplyr::filter(sex=="Man")

mm_men_nodom <- cregg::mm(df_men_nodom, # data
                          choice~position + remuneration + workload + work_environment, # formula
                          id = id)

mm_men_nodom$sex <- "Men - No dominant options"

df_women_nodom <- df_long %>% 
  dplyr::filter(dominant_exposure==0) %>%
  dplyr::filter(sex=="Woman")

mm_women_nodom <- cregg::mm(df_women_nodom, # data
                            choice~position + remuneration + workload + work_environment, # formula
                            id = id)

mm_women_nodom$sex <- "Women - No dominant options"

#combine estimates from the two subsets, rename to plurals and rearrange levels of feature factor
mm_sex_subsets_nodom <- bind_rows(mm_men, mm_women, mm_men_nodom, mm_women_nodom) %>%
  mutate(sex = factor(sex, levels = c("Women - No dominant options",
                                      "Men - No dominant options",
                                      "Women - All choices (main)",
                                      "Men - All choices (main)"))) %>% 
  mutate(feature = case_when(feature=="work_environment"~"Working Environment",
                             feature=="workload"~"Workload",
                             feature=="remuneration"~"Remuneration",
                             feature=="position"~"Position")) %>% 
  mutate(feature = factor(feature, levels = c("Position", "Remuneration", "Workload", "Working Environment")))

mm_dominant_plot <- mm_sex_subsets_nodom %>% 
  ggplot(data=., aes(y = level, x = estimate, color = sex, shape = sex, linetype = sex)) +
  geom_point(aes(shape = sex),
             position = position_dodge2(width = 0.8)) +
  geom_linerange(aes(xmin=estimate-std.error*1.39,
                     xmax=estimate+std.error*1.39),
                 position = position_dodge2(width = 0.8)) +
  xlab("Marginal mean") +
  ylab("") +
  scale_x_continuous(labels = seq(0.3,0.8,0.05), breaks = seq(0.3,0.8,0.05)) +
  geom_vline(xintercept = 0.5, linetype = "dashed") +
  theme_bw() +
  coord_cartesian(xlim = c(0.35,0.73)) +
  scale_color_manual("", values = c("gray", "black", "gray", "black")) +
  scale_shape_manual("", values=c(2,1,17,16)) + #(16,17,1,2)
  scale_linetype_manual("", values = c(2,2,1,1)) +
  facet_wrap(~feature, scales = "free_y", ncol = 1) +
  theme(legend.position = "top", panel.background = element_rect(fill = "white"),
        strip.background = element_rect("white"),
        strip.text = element_text(hjust = 0, face = "bold"),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  guides(color=guide_legend(ncol=2, byrow = TRUE, reverse = TRUE),
         shape=guide_legend(ncol=2, byrow = TRUE, reverse = TRUE),
         linetype=guide_legend(ncol=2, byrow = TRUE, reverse = TRUE)) 

mm_dominant_plot


## estimate differences in marginal means
# whole sample
mm_sex_differences <- cregg::mm_diffs(df_long, # data
                                      choice~position + remuneration + workload + work_environment, # formula
                                      ~sex)

mm_sex_differences$nodom <- "All choices"

# non-dominant
df_long_nodom <- df_long %>% 
  dplyr::filter(dominant_exposure==0)

mm_sex_differences_nodom <- cregg::mm_diffs(df_long_nodom, # data
                                            choice~position + remuneration + workload + work_environment, # formula
                                            ~sex)

mm_sex_differences_nodom$nodom <- "No dominant options"

mm_differences_nodom <- bind_rows(mm_sex_differences,mm_sex_differences_nodom) %>%
  mutate(feature = case_when(feature=="work_environment"~"Working Environment",
                             feature=="workload"~"Workload",
                             feature=="remuneration"~"Remuneration",
                             feature=="position"~"Position")) %>% 
  mutate(feature = factor(feature, levels = c("Position", "Remuneration", "Workload", "Working Environment"))) %>%
  mutate(nodom = factor(nodom, levels = c("No dominant options","All choices")))

mm_dominant_differences_plot <- 
  mm_differences_nodom %>% 
  ggplot(data=., aes(y = level, x = estimate, shape = nodom, linetype = nodom)) +
  geom_point(color = "black", position = position_dodge2(width = 0.8)) +
  geom_linerange(aes(xmin=lower,
                     xmax=upper),
                 color = "black", position = position_dodge2(width = 0.8)) +
  xlab("Differences\n(women - men)") +
  ylab("") +
  scale_x_continuous(labels = seq(-0.2,0.2,0.05), breaks = seq(-0.2,0.2,0.05)) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  theme_bw() +
  scale_linetype_manual("", values = c(2,1)) +
  scale_shape_manual("", values = c(1,16)) +
  facet_wrap(~feature, scales = "free_y", ncol = 1) +
  coord_cartesian(xlim = c(-0.1,0.1)) +
  theme(legend.position = "none", panel.background = element_rect(fill = "white"),
        strip.background = element_rect("white"),
        strip.text = element_text(hjust = 0, face = "bold"),
        axis.text.y=element_blank(), axis.ticks.y=element_blank(),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  guides(color=guide_legend(ncol=1, byrow = TRUE, reverse = TRUE),
         shape=guide_legend(ncol=1, byrow = TRUE, reverse = TRUE),
         linetype=guide_legend(ncol=1, byrow = TRUE, reverse = TRUE)) 

mm_dominant_differences_plot

mm_dominant_plot + mm_dominant_differences_plot + plot_layout(widths = c(2, 1))


######################
### VICTIMS ##########
######################

df_victims <- df_long %>% 
  dplyr::filter(victim=="Victim")

mm_victims <- cregg::mm(df_victims, # data
                        choice~position + remuneration + workload + work_environment, # formula
                        id = id)

mm_victims$victim <- "Victim - All choices (main)"

df_nonvictims <- df_long %>% 
  dplyr::filter(victim=="Non-victim")

mm_nonvictims <- cregg::mm(df_nonvictims, # data
                           choice~position + remuneration + workload + work_environment, # formula
                           id = id)

mm_nonvictims$victim <- "Non-victim - All choices (main)"

### remove dominant choices
df_victims_nodom <- df_long %>% 
  dplyr::filter(dominant_exposure==0) %>%
  dplyr::filter(victim=="Victim")


mm_victims_nodom <- cregg::mm(df_victims_nodom, # data
                              choice~position + remuneration + workload + work_environment, # formula
                              id = id)

mm_victims_nodom$victim <- "Victim - No dominant options"

df_nonvictims_nodom <- df_long %>% 
  dplyr::filter(dominant_exposure==0) %>%
  dplyr::filter(victim=="Non-victim")

mm_nonvictims_nodom <- cregg::mm(df_nonvictims_nodom, # data
                                 choice~position + remuneration + workload + work_environment, # formula
                                 id = id)

mm_nonvictims_nodom$victim <- "Non-victim - No dominant options"

#combine estimates from the two subsets, add attribute variable for color and rearrange order of levels
mm_victim_subsets <- bind_rows(mm_victims, mm_nonvictims, mm_victims_nodom, mm_nonvictims_nodom) %>% 
  mutate(victim = factor(victim, levels = c("Victim - No dominant options",
                                            "Non-victim - No dominant options",
                                            "Victim - All choices (main)",
                                            "Non-victim - All choices (main)"))) %>%
  mutate(feature = case_when(feature=="work_environment"~"Working Environment",
                             feature=="workload"~"Workload",
                             feature=="remuneration"~"Remuneration",
                             feature=="position"~"Position")) %>% 
  mutate(feature = factor(feature, levels = c("Position", "Remuneration", "Workload", "Working Environment")))


# create plot for only work environment
mm_dominant_plot_victim <- mm_victim_subsets %>% 
  filter(feature=="Working Environment") %>% 
  ggplot(data=., aes(y = level, x = estimate, color = victim, shape = victim, linetype = victim)) +
  geom_point(position = position_dodge2(width = 0.8)) +
  geom_linerange(aes(xmin=estimate-std.error*1.39,
                     xmax=estimate+std.error*1.39),
                 position = position_dodge2(width = 0.8)) +
  xlab("Marginal mean") +
  ylab("") +
  scale_x_continuous(labels = seq(0.3,0.8,0.05), breaks = seq(0.3,0.8,0.05)) +
  geom_vline(xintercept = 0.5, linetype = "dashed") +
  theme_bw() +
  coord_cartesian(xlim = c(0.35,0.73)) +
  scale_color_manual("", values = c("gray", "black", "gray", "black")) +
  scale_shape_manual("", values=c(2,1,17,16)) + #(16,17,1,2)
  scale_linetype_manual("", values = c(2,2,1,1)) +
  scale_alpha_manual("", values = c(0.5,1)) +
  facet_wrap(~feature, scales = "free_y", ncol = 1) +
  theme(legend.position = "top", panel.background = element_rect(fill = "white"),
        strip.background = element_rect("white"),
        strip.text = element_text(hjust = 0, face = "bold"),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  guides(color=guide_legend(ncol=2, byrow = TRUE, reverse = TRUE),
         shape=guide_legend(ncol=2, byrow = TRUE, reverse = TRUE),
         linetype=guide_legend(ncol=2, byrow = TRUE, reverse = TRUE)) 

mm_dominant_plot_victim


## estimate differences in marginal means
# whole sample
mm_victim_differences <- cregg::mm_diffs(df_long, # data
                                         choice~position + remuneration + workload + work_environment, # formula
                                         ~victim)

mm_victim_differences$nodom <- "All choices"

# non-dominant
mm_victim_differences_nodom <- cregg::mm_diffs(df_long_nodom, # data
                                               choice~position + remuneration + workload + work_environment, # formula
                                               ~victim)

mm_victim_differences_nodom$nodom <- "No dominant options"

mm_differences_victim_nodom <- bind_rows(mm_victim_differences,mm_victim_differences_nodom) %>%
  mutate(feature = case_when(feature=="work_environment"~"Working Environment",
                             feature=="workload"~"Workload",
                             feature=="remuneration"~"Remuneration",
                             feature=="position"~"Position")) %>% 
  mutate(feature = factor(feature, levels = c("Position", "Remuneration", "Workload", "Working Environment"))) %>%
  mutate(nodom = factor(nodom, levels = c("No dominant options","All choices")))

mm_dominant_plot_victim_dif <- mm_differences_nodom %>% 
  filter(feature=="Working Environment") %>% 
  ggplot(data=., aes(y = level, x = estimate, shape = nodom, linetype = nodom)) +
  geom_point(color = "black", position = position_dodge2(width = 0.8)) +
  geom_linerange(aes(xmin=lower,
                     xmax=upper),
                 color = "black", position = position_dodge2(width = 0.8)) +
  xlab("Differences\n(victims - non-victims)") +
  ylab("") +
  scale_x_continuous(labels = seq(-0.2,0.2,0.05), breaks = seq(-0.2,0.2,0.05)) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  theme_bw() +
  scale_linetype_manual("", values = c(2,1)) +
  scale_shape_manual("", values = c(1,16)) +
  facet_wrap(~feature, scales = "free_y", ncol = 1) +
  coord_cartesian(xlim = c(-0.1,0.1)) +
  theme(legend.position = "none", panel.background = element_rect(fill = "white"),
        strip.background = element_rect("white"),
        strip.text = element_text(hjust = 0, face = "bold"),
        axis.text.y=element_blank(), axis.ticks.y=element_blank(),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  guides(color=guide_legend(ncol=1, byrow = TRUE, reverse = TRUE),
         shape=guide_legend(ncol=1, byrow = TRUE, reverse = TRUE),
         linetype=guide_legend(ncol=1, byrow = TRUE, reverse = TRUE)) 

mm_dominant_plot_victim + mm_dominant_plot_victim_dif + plot_layout(widths = c(2, 1))


### FIGURE 7 one big plot!
mm_dominant_plot + mm_dominant_differences_plot + mm_dominant_plot_victim + mm_dominant_plot_victim_dif + plot_layout(widths = c(3, 1), heights = c(4.5,1))


ggsave("figure7.pdf", height = 11.25, width = 10)
ggsave("figure7.png", height = 11.25, width = 10, dpi = 600)
